library(RColorBrewer)
library(tidyverse)
library(knitr)
library(igraph)
library(DiagrammeR)
library(DiagrammeRsvg)
library(rsvg)
library(scales)
library(ggExtra)
library(data.table)
library(cowplot)

el <-
  data.table::fread("data/processed/pwdprotect_edgelist_user.csv.gz")

candidate_names <- 
  data.table::fread("data/processed/pwdprotect_crossplatform_names_norm_and_filtered.csv.gz") %>%
  dplyr::filter(db == "candidates_dummies_2.csv")

el_ordered.df <- 
  el %>%
  dplyr::group_by(from) %>%
  dplyr::summarise(first_platform = platform[which.min(posix)],
                   fa = sum(platform == "fa"),
                   bl = sum(platform == "bl"),
                   mu = sum(platform == "mu"),
                   fo = sum(platform == "fo"))

el_ordered_candidates.df <- 
  el %>%
  dplyr::filter(from %in% candidate_names$normalised) %>%
  dplyr::group_by(from) %>%
  dplyr::summarise(first_platform = platform[which.min(posix)],
                   fa = sum(platform == "fa"),
                   bl = sum(platform == "bl"),
                   mu = sum(platform == "mu"),
                   fo = sum(platform == "fo"))


el_ordered.df <- 
  
  dplyr::bind_rows(
    
    el_ordered.df %>%
      dplyr::filter(first_platform == 'fa') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 1) / n(), 
                       bl = sum(bl > 0) / n(), 
                       mu = sum(mu > 0) / n(), 
                       fo = sum(fo > 0) / n()),
    
    el_ordered.df %>%
      dplyr::filter(first_platform == 'bl') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 0) / n(), 
                       bl = sum(bl > 1) / n(), 
                       mu = sum(mu > 0) / n(), 
                       fo = sum(fo > 0) / n()),
    
    el_ordered.df %>%
      dplyr::filter(first_platform == 'mu') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 0) / n(), 
                       bl = sum(bl > 0) / n(), 
                       mu = sum(mu > 1) / n(), 
                       fo = sum(fo > 0) / n()),
    
    el_ordered.df %>%
      dplyr::filter(first_platform == 'fo') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 0) / n(), 
                       bl = sum(bl > 0) / n(), 
                       mu = sum(mu > 0) / n(), 
                       fo = sum(fo > 1) / n()),
  )


el_ordered_long.df <- 
  el_ordered.df %>%
  tidyr::pivot_longer(cols = fa:fo)


el_ordered_candidates.df <- 
  
  dplyr::bind_rows(
    
    el_ordered_candidates.df %>%
      dplyr::filter(first_platform == 'fa') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 1) / n(), 
                       bl = sum(bl > 0) / n(), 
                       mu = sum(mu > 0) / n(), 
                       fo = sum(fo > 0) / n()),
    
    el_ordered_candidates.df %>%
      dplyr::filter(first_platform == 'bl') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 0) / n(), 
                       bl = sum(bl > 1) / n(), 
                       mu = sum(mu > 0) / n(), 
                       fo = sum(fo > 0) / n()),
    
    el_ordered_candidates.df %>%
      dplyr::filter(first_platform == 'mu') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 0) / n(), 
                       bl = sum(bl > 0) / n(), 
                       mu = sum(mu > 1) / n(), 
                       fo = sum(fo > 0) / n()),
    
    el_ordered_candidates.df %>%
      dplyr::filter(first_platform == 'fo') %>%
      dplyr::group_by(first_platform) %>%
      dplyr::summarise(fa = sum(fa > 0) / n(), 
                       bl = sum(bl > 0) / n(), 
                       mu = sum(mu > 0) / n(), 
                       fo = sum(fo > 1) / n()),
  )


el_ordered_candidates_long.df <- 
  el_ordered_candidates.df %>%
  tidyr::pivot_longer(cols = fa:fo)

el_ordered.df %>%
  kable()

el_ordered_candidates.df %>%
  kable()

these_names <- 
  setNames(1:4, c("fo","fa","mu","bl"))

nodes.df <- 
  create_node_df(n = 4,
                 label = c("Forum", "Facebook", "Meetup", "Blog"))

edges_1.df <- 
  create_edge_df(from = these_names[el_ordered_long.df$first_platform],
                 to = these_names[el_ordered_long.df$name])

graph_1 <- 
  create_graph() %>%
  add_global_graph_attrs(
    attr = "overlap",
    value = "false",
    attr_type = "graph") %>%
  add_node_df(nodes.df) %>%
  add_edge_df(edges_1.df) %>%
  set_edge_attrs("label", value = paste0(round(el_ordered_long.df$value*100, 1),"%")) %>%
  set_edge_attrs("penwidth", value = log(el_ordered_long.df$value*100)) %>%
  render_graph(layout = "kk", title = "All users")

graph_1 %>%
  export_svg %>% 
  charToRaw %>% 
  rsvg_svg("output/figures/figure_3_a_1.svg")

graph_1 %>%
  export_svg %>% 
  charToRaw %>% 
  rsvg_png("output/figures/figure_3_a_1.png")

edges_2.df <- 
  create_edge_df(from = these_names[el_ordered_candidates_long.df$first_platform],
                 to = these_names[el_ordered_candidates_long.df$name])

graph_2 <- 
  create_graph() %>%
  add_global_graph_attrs(
    attr = "overlap",
    value = "false",
    attr_type = "graph") %>%
  add_node_df(nodes.df) %>%
  add_edge_df(edges_2.df) %>%
  set_edge_attrs("label", value = paste0(round(el_ordered_candidates_long.df$value*100, 1),"%")) %>%
  set_edge_attrs("penwidth", value = log(el_ordered_candidates_long.df$value*100)) %>%
  render_graph(layout = "kk", title = "2012 primary candidates")


graph_2 %>%
  export_svg %>% 
  charToRaw %>% 
  rsvg_svg("output/figures/figure_3_a_2.svg")

graph_2 %>%
  export_svg %>% 
  charToRaw %>% 
  rsvg_png("output/figures/figure_3_a_2.png")